#####Replication Code for "Outcomes of Outcomes: Performance Feedback in Collaborative Watershed Management"#####

# 1. Preparing the dataset for analysis #

library(devtools)
library(haven)
library(magrittr)
library(dplyr)
library(tidyr)
library(tidyverse)
library(RcppRoll)
library(readxl)
library(sp)
library(sf)
library(terra)
library(tmap)
library(precrec) # convert shapefile to dataframe
library(usmap)
library(rnaturalearth) # world map
library(spdep)
library(splm)
library(sphet)
library(spatialreg)
library(ggplot2)
library(panelView)
library(lubridate)
library(fixest)
library(plm)
library(lmtest)
library(fect)
library(PanelMatch)
library(creditmodel)
library(lme4)
library(tseries)
library(stargazer)
library(texreg)
library(purrr) #descriptive
library(xtable) 
library(boot)

df1<-read.csv("df_adaptation.csv")

panelview(wqi ~ 1, data = df1,  
          index = c("huc8","year"), 
          pre.post = TRUE, 
          display.all = TRUE,
          by.timing = TRUE)

sapply(df1, function(x) sum(is.na(x)))
df1 <- df1 %>%
  mutate_at(c(9:121), ~replace_na(.,0))

#Creating wqi status#

df1$wqistatus <- as.factor(ifelse(df1$wqi>=90, 'Excellent', 
                                  ifelse(df1$wqi>=85, 'Good', 
                                         ifelse(df1$wqi>=80, 'Fair', 
                                                ifelse(df1$wqi>=60, 'Poor', 'Very Poor')))))

df1$wqistatus_Excellent <- ifelse(df1$wqistatus == "Excellent", 1, 0)
df1$wqistatus_Good      <- ifelse(df1$wqistatus == "Good", 1, 0)
df1$wqistatus_Fair      <- ifelse(df1$wqistatus == "Fair", 1, 0)
df1$wqistatus_Poor      <- ifelse(df1$wqistatus == "Poor", 1, 0)
df1$wqistatus_VeryPoor  <- ifelse(df1$wqistatus == "Very Poor", 1, 0)

#Creating wqi change#
df1 <- df1 %>%
  mutate(year = as.integer(year),
         huc8 = as.character(huc8),
         wqi  = as.numeric(wqi)) %>%
  group_by(huc8) %>%
  arrange(year, .by_group = TRUE) %>%
  mutate(
    # simple lag-1 difference
    owqi_change = wqi - dplyr::lag(wqi),
    
    # optional: only compute if prior row is the immediately previous year
    owqi_change_consec = if_else(year - dplyr::lag(year) == 1,
                                 wqi - dplyr::lag(wqi),
                                 NA_real_)
  ) %>%
  ungroup()

#descriptive statistics#
get_summary_stats <- function(x) {
  tibble(
    N = length(x[!is.na(x)]),  # count non-NA values
    mean = mean(x, na.rm = TRUE),
    sd = sd(x, na.rm = TRUE),
    min = min(x, na.rm = TRUE),
    max = max(x, na.rm = TRUE)
  )
}
variables <- c("projects", "projects_m", "projects_v", "total_pa", "total_pa_m", "total_pa_v", 
               "total_cost", "total_cost_m", "total_cost_v", "wqi", "wqistatus_Excellent", "wqistatus_Good", "wqistatus_Fair", "wqistatus_Poor", "wqistatus_VeryPoor",
               "diversity_pa", "diversity_pa_m", "diversity_pa_v", "lead_wc", "lead_wc_m", "lead_wc_v", "forest", "air_temp", "precipitation")

des_stat<- df1 %>%
  dplyr::select(variables) %>%
  map_df(~get_summary_stats(.x), .id = "variable")

write.csv(des_stat, "des_table.csv", row.names = FALSE)

des_table<-xtable(des_stat, digits=4)
print.xtable(des_table,type="html",file="des_table.html")
print.xtable(des_table)

#Moran
# Initialize an empty data frame to store Moran's I statistics
moran_df <- data.frame(
  year = numeric(),
  moran_I = numeric(),
  expected_I = numeric(),
  z_score = numeric(),
  p_value = numeric()
)

years <- unique(df1$year)

for(year in years) {
  
  # Subset data for the specific year and remove rows with NA
  df_year <- df1[df1$year == year,]
  df_year <- na.omit(df_year)
  
  # Check if there are any observations left after omitting NA
  if(nrow(df_year) == 0) {
    print(paste("No observations in year", year))
    next
  }
  
  # Convert df_year to SpatialPointsDataFrame
  df_year_coords <- df_year[,c("lon","lat")]
  
  # Create a distance matrix
  dist_matrix <- sp::spDists(as.matrix(df_year_coords))
  inv_dist_matrix <- 1 / dist_matrix
  diag(inv_dist_matrix) <- 0
  
  # Create a listw object from the inverse distance matrix
  listw <- mat2listw(inv_dist_matrix, style="W")
  
  # Calculate Moran's I
  test <- try(moran.test(df_year$wqi, listw), silent = TRUE)
  
  # Check if there was an error
  if(class(test) == "try-error") {
    print(paste("Error in year", year))
    next
  }
  
  # Extracting relevant statistics from the test object
  moran_I <- as.numeric(test$estimate["Moran I statistic"])
  expected_I <- as.numeric(test$estimate["Expectation"])
  z_score <- as.numeric(test$statistic)
  p_value <- as.numeric(test$p.value)
  
  # Append the row to the data frame
  moran_df <- rbind(moran_df, data.frame(year, moran_I, expected_I, z_score, p_value))
}

# Display the final table
print(moran_df)
moran_table<-xtable(moran_df, digits=2)
print.xtable(moran_table,type="html",file="moran_table.html")
print.xtable(moran_table)

write.csv(moran_table, "moran_table.csv", row.names = FALSE)

#correlations + p-values (pairwise NAs handled)
library(Hmisc)
df_use <-cbind(df1$projects,df1$total_pa,df1$total_cost)
rc <- rcorr(as.matrix(df_use), type = "pearson")
round(rc$r, 2)   # correlations
round(rc$P, 3)   # p-values

# 2. Main Analysis # 
######projects######

# Main model
mod1s<- projects ~ 
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1s<-plm(mod1s,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m1s)
m1sa<-plm(mod1s,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "within")
summary(m1sa)
phtest(m1s,m1sa) # FE vs RE

# Mundlak specification 
df1$wqi_mean <- ave(df1$wqi, df1$huc8)

mod1s_md <- projects ~ 
  lag(wqi) + 
  lag(wqi_mean) +
  I(lag(wqi)^2) +
  I(lag(wqi_mean)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1s_md<-plm(mod1s_md,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m1s_md)

# Categorical status
df1$wqistatus <- relevel(factor(df1$wqistatus), ref = "Very Poor")

mod1s_c<- projects ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1s_c<-plm(mod1s_c,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "random")
summary(m1s_c)
m1sa_c<-plm(mod1s_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "within")
summary(m1sa_c)
phtest(m1s_c,m1sa_c) # FE vs RE

install.packages("car")
library(car)
linearHypothesis(m1s_c,"lag(wqistatus)Poor = lag(wqistatus)Fair")
linearHypothesis(m1s_c,"lag(wqistatus)Good = lag(wqistatus)Excellent")

# 2-year lags
mod1s_l<- projects ~ 
  lag(wqi,2) + 
  I(lag(wqi,2)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1s_l<-plm(mod1s_l,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m1s_l)
m1sa_l<-plm(mod1s_l,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "within")
summary(m1sa_l)
phtest(m1s_l,m1sa_l) # FE vs RE

######participants######
# Main model
mod2s<- total_pa ~
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m2s<-plm(mod2s,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m2s)
m2sa<-plm(mod2s,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "within")
summary(m2sa)
phtest(m2s,m2sa) # FE vs RE

# Mundlak specification
mod2s_md<- total_pa ~
  lag(wqi) + 
  lag(wqi_mean) +
  I(lag(wqi)^2) +
  I(lag(wqi_mean)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m2s_md<-plm(mod2s_md,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m2s_md)

# Categorical status
mod2s_c<- total_pa ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m2s_c<-plm(mod2s_c,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "random")
summary(m2s_c)
m2sa_c<-plm(mod2s_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "within")
summary(m2sa_c)
phtest(m2s_c,m2sa_c) # FE vs RE

# 2-year lags
mod2s_l<- total_pa ~
  lag(wqi,2) + 
  I(lag(wqi,2)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m2s_l<-plm(mod2s_l,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m2s_l)
m2sa_l<-plm(mod2s_l,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "within")
summary(m2sa_l)
phtest(m2s_l,m2sa_l) # FE vs RE

######total expenditures######
# Main model
mod3s<- log(total_cost+1) ~ 
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m3s<-plm(mod3s,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m3s)
m3sa<-plm(mod3s,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "within")
summary(m3sa)
phtest(m3s,m3sa) # FE vs RE

# Mundlak specification
mod3s_md<- log(total_cost+1) ~ 
  lag(wqi) + 
  lag(wqi_mean) + 
  I(lag(wqi)^2) +
  I(lag(wqi_mean)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m3s_md<-plm(mod3s_md,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m3s_md)

# Categorical status
mod3s_c<- log(total_cost+1) ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) + 
  projects

m3s_c<-plm(mod3s_c,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "random")
summary(m3s_c)
m3sa_c<-plm(mod3s_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "within")
summary(m3sa_c)
phtest(m3s_c,m3sa_c) # FE vs RE

# 2-year lags
mod3s_l<- log(total_cost+1) ~ 
  lag(wqi,2) + 
  I(lag(wqi,2)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects

m3s_l<-plm(mod3s_l,data = df1,
         index = c("huc8", "year"),
         effect = "twoways",
         model = "random")
summary(m3s_l)
m3sa_l<-plm(mod3s_l,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "within")
summary(m3sa_l)
phtest(m3s_l,m3sa_l) # FE vs RE

# 3. Subgroup analysis #
######OWEB funded-mandatory#####
######projects#####
mod1sm<- projects_m ~ 
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1sm<-plm(mod1sm,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "random")
summary(m1sm)
m1sam<-plm(mod1sm,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "within")
summary(m1sam)
phtest(m1sm,m1sam) # FE vs RE

mod1sm_c<- projects_m ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1sm_c<-plm(mod1sm_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m1sm_c)
m1sam_c<-plm(mod1sm_c,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m1sam_c)
phtest(m1sm_c,m1sam_c) # FE vs RE

######participant######
mod2sm<- total_pa_m ~
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_m

m2sm<-plm(mod2sm,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "random")
summary(m2sm)
m2sam<-plm(mod2sm,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "within")
summary(m2sam)
phtest(m2sm,m2sam) # FE vs RE

mod2sm_c<- total_pa_m ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_m

m2sm_c<-plm(mod2sm_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m2sm_c)
m2sam_c<-plm(mod2sm_c,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m2sam_c)
phtest(m2sm_c,m2sam_c) # FE vs RE

######total expenditures######
mod3sm<- log(total_cost_m+1) ~ 
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_m

m3sm<-plm(mod3sm,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "random")
summary(m3sm)
m3sam<-plm(mod3sm,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "within")
summary(m3sam)
phtest(m3sm,m3sam) # FE vs RE

mod3sm_c<- log(total_cost_m+1) ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_m

m3sm_c<-plm(mod3sm_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m3sm_c)
m3sam_c<-plm(mod3sm_c,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m3sam_c)
phtest(m3sm_c,m3sam_c) # FE vs RE

######non OWEB funded-voluntary#####
######projects######
mod1sv<- projects_v ~ 
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1sv<-plm(mod1sv,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "random")
summary(m1sv)
m1sav<-plm(mod1sv,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "within")
summary(m1sav)
phtest(m1sv,m1sav) # FE vs RE

mod1sv_c<- projects_v ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1sv_c<-plm(mod1sv_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m1sv_c)
m1sav_c<-plm(mod1sv_c,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m1sav_c)
phtest(m1sv_c,m1sav_c) # FE vs RE

mod1sv_d<- projects_v ~ 
  lag(owqi_change) + 
  I(lag(owqi_change)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation)

m1sv_d<-plm(mod1sv_d,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m1sv_d)
m1sav_d<-plm(mod1sv_d,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m1sav_d)
phtest(m1sv_d,m1sav_d) # FE vs RE

######participants######
mod2sv<- total_pa_v ~
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_v

m2sv<-plm(mod2sv,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "random")
summary(m2sv)
m2sav<-plm(mod2sv,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "within")
summary(m2sav)
phtest(m2sv,m2sav) # FE vs RE

mod2sv_c<- total_pa_v ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_v

m2sv_c<-plm(mod2sv_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m2sv_c)
m2sav_c<-plm(mod2sv_c,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m2sav_c)
phtest(m2sv_c,m2sav_c) # FE vs RE

######total expenditures######
mod3sv<- log(cash_v+1) ~ 
  lag(wqi) + 
  I(lag(wqi)^2) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_v

m3sv<-plm(mod3sv,data = df1,
          index = c("huc8", "year"),
          effect = "twoways",
          model = "random")
summary(m3sv)
m3sav<-plm(mod3sv,data = df1,
           index = c("huc8", "year"),
           effect = "twoways",
           model = "within")
summary(m3sav)
phtest(m3sv,m3sav) # FE vs RE

mod3sv_c<- log(inkind_v+1) ~ 
  lag(wqistatus) +
  lag(diversity_pa) +
  lag(lead_wc) +
  lag(W_wqi) +
  lag(forest) + 
  lag(air_temp) + 
  lag(precipitation) +
  projects_v

m3sv_c<-plm(mod3sv_c,data = df1,
            index = c("huc8", "year"),
            effect = "twoways",
            model = "random")
summary(m3sv_c)
m3sav_c<-plm(mod3sv_c,data = df1,
             index = c("huc8", "year"),
             effect = "twoways",
             model = "within")
summary(m3sav_c)
phtest(m3sv_c,m3sav_c) # FE vs RE

#####table####
wordreg(list(m1s,m2s,m3s), 
        file = "main_combined.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqi)" = "OWQI",
                               "I(lag(wqi)^2)" = "OWQI Squared",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1sa,m2sa,m3sa), 
        file = "main_combined_fixed.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqi)" = "OWQI",
                               "I(lag(wqi)^2)" = "OWQI Squared",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1s_md,m2s_md,m3s_md), 
        file = "main_combined_mundlak.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqi)" = "OWQI",
                               "I(lag(wqi_mean)^2)" = "OWQI Squared",
                               "lag(wqi_mean)" = "OWQI Mean",
                               "I(lag(wqi)^2)" = "OWQI Mean Squared",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1s_c,m2s_c,m3s_c), 
        file = "main_combined_categorical.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqistatus)Excellent" = "Excellent",
                               "lag(wqistatus)Fair" = "Fair",
                               "lag(wqistatus)Good" = "Good",
                               "lag(wqistatus)Poor" = "Poor",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1s_l,m2s_l,m3s_l), 
        file = "main_combined_lag2.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqi,2)" = "OWQI",
                               "I(lag(wqi_mean,2)^2)" = "OWQI Squared",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1sm,m2sm,m3sm), 
        file = "sub_mandatory.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqi)" = "OWQI",
                               "I(lag(wqi)^2)" = "OWQI Squared",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects_m" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1sm_c,m2sm_c,m3sm_c), 
        file = "sub_mandatory_categorical.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqistatus)Excellent" = "Excellent",
                               "lag(wqistatus)Fair" = "Fair",
                               "lag(wqistatus)Good" = "Good",
                               "lag(wqistatus)Poor" = "Poor",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects_m" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1sv,m2sv,m3sv), 
        file = "sub_voluntary.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqi)" = "OWQI",
                               "I(lag(wqi)^2)" = "OWQI Squared",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects_v" = "Number of Projects",
                               "(Intercept)" = "Constant"))

wordreg(list(m1sv_c,m2sv_c,m3sv_c), 
        file = "sub_voluntary_categorical.doc",
        digits = 4,
        custom.model.names = c("(1)","(2)","(3)"),
        custom.coef.map = list("lag(wqistatus)Excellent" = "Excellent",
                               "lag(wqistatus)Fair" = "Fair",
                               "lag(wqistatus)Good" = "Good",
                               "lag(wqistatus)Poor" = "Poor",
                               "lag(diversity_pa)" = "Participant Diversity",
                               "lag(lead_wc)" = "Watershed Council as Lead Organization",
                               "lag(forest)" = "Forest Land Use (%)",
                               "lag(air_temp)" = "Temperature (°C)",
                               "lag(precipitation)" = "Precipitation (mm)",
                               "lag(W_wqi)" = "Spatial Lag",
                               "projects_v" = "Number of Projects",
                               "(Intercept)" = "Constant"))

# 4. Figures

# 1. Annual WQI trends by HUC8's mean WQI
library(dplyr)
library(ggplot2)
library(forcats)

df1_clean <- df1 %>%
  transmute(
    year = as.integer(year),
    huc8 = as.character(huc8),
    wqi  = pmin(pmax(as.numeric(wqi), 0), 100)
  ) %>%
  filter(!is.na(year), !is.na(huc8), !is.na(wqi), dplyr::between(year, 1997, 2023))

huc8_year <- df1_clean %>%
  group_by(huc8, year) %>%
  summarise(wqi_huc8_year = mean(wqi, na.rm = TRUE), .groups = "drop")

huc8_firstyear <- huc8_year %>%
  group_by(huc8) %>%
  arrange(year, .by_group = TRUE) %>%
  slice_min(order_by = year, with_ties = FALSE) %>%  # earliest non-missing year
  ungroup() %>%
  transmute(
    huc8,
    first_year = year,
    first_wqi  = wqi_huc8_year
  ) %>%
  mutate(group = cut(
    first_wqi,
    breaks = c(10, 60, 80, 85, 90, 100.0001),  # [10,60), [60,80), [80,85), [85,90), [90,100]
    right  = FALSE,
    labels = c("Very poor (10–59)", "Poor (60–79)", "Fair (80–84)",
               "Good (85–89)", "Excellent (90–100)")
  )) %>%
  mutate(group = fct_relevel(group,
                             "Excellent (90–100)", "Good (85–89)",
                             "Fair (80–84)", "Poor (60–79)", "Very poor (10–59)"))

group_annual_stats <- huc8_year %>%
  left_join(huc8_firstyear, by = "huc8") %>%
  filter(!is.na(group)) %>%
  group_by(group, year) %>%
  summarise(
    mean_wqi = mean(wqi_huc8_year, na.rm = TRUE),
    sd_wqi   = sd(wqi_huc8_year,   na.rm = TRUE),
    n_huc8   = dplyr::n(),
    .groups  = "drop"
  ) %>%
  mutate(lo = pmax(mean_wqi - sd_wqi, 0),
         hi = pmin(mean_wqi + sd_wqi, 100))

cols <- c(
  "Excellent (90–100)" = "#2B8CBE",
  "Good (85–89)"       = "#1a9850",
  "Fair (80–84)"       = "#FFC107",
  "Poor (60–79)"       = "#fc8d59",
  "Very poor (10–59)"  = "#d73027"
)

f_1 <- ggplot(group_annual_stats, aes(x = year, y = mean_wqi)) +
  # slightly stronger fill for visibility
  geom_ribbon(aes(ymin = lo, ymax = hi, fill = group),
              alpha = 0.28, color = NA) +
  # thin outline to make band edges crisp (but still subtle)
  geom_ribbon(aes(ymin = lo, ymax = hi, color = group),
              fill = NA, linewidth = 0.45, alpha = 0.8) +
  # mean lines + points
  geom_line(aes(color = group), linewidth = 1.1) +
  geom_point(aes(color = group), size = 1.8) +
  scale_x_continuous(breaks = seq(1997, 2023, 2), limits = c(1997, 2023)) +
  scale_y_continuous(breaks = seq(0, 100, 10), limits = c(0, 100)) +
  scale_color_manual(values = cols, name = "First-year OWQI group") +
  scale_fill_manual(values = cols, guide = "none") +
  labs(x = "Year", y = "OWQI (0–100)") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "right")

ggsave("wqi_firstyear_group_mean_sd.png", f_1, width = 8, height = 5, dpi = 300)

library(plm)
library(dplyr)
library(tibble)
library(ggplot2)

s <- summary(m1s_c)

if(!is.null(s$coefficients) && ncol(s$coefficients) >= 2){
  coefmat <- as.data.frame(s$coefficients)
  # common colnames: Estimate, Std. Error, t value, Pr(>|t|)
  # normalize names:
  names(coefmat)[1:4] <- c("estimate","se","stat","p")
  coef_tidy <- coefmat %>%
    tibble::rownames_to_column("term") %>%
    dplyr::select(term, estimate, se, stat, p)
} else {
  # fallback: use model coef() and model vcov() (conventional)
  est_vec <- coef(m1s_c)
  vcov_mat <- vcov(m1s_c)                      # model-based covariance
  se_vec <- sqrt(diag(vcov_mat))
  coef_tidy <- tibble(
    term = names(est_vec),
    estimate = as.numeric(est_vec),
    se = se_vec[names(est_vec)]
  )
}

coef_tidy <- coef_tidy %>%
  mutate(lower = estimate - 1.96 * se,
         upper = estimate + 1.96 * se)

print(coef_tidy$term)

coef_wq <- coef_tidy %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(
    level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
    level = gsub("^lag\\(|\\)$", "", level),
    level = trimws(level)
  )

desired_order <- c("Poor", "Fair", "Good", "Excellent")
coef_wq <- coef_wq %>%
  filter(level %in% desired_order) %>%
  mutate(level = factor(level, levels = desired_order))

coef_wq_ext <- coef_wq %>%
  # add the reference group manually
  bind_rows(
    data.frame(
      term = "Reference",
      estimate = 0,
      se = NA,
      lower = NA,
      upper = NA,
      level = "Very Poor"
    )
  )

coef_wq_ext$level <- factor(
  coef_wq_ext$level,
  levels = c("Very Poor", "Poor", "Fair", "Good", "Excellent")
)

status_colors <- c(
  "Excellent" = "#2B8CBE",
  "Good"      = "#1a9850",
  "Fair"      = "#FFC107",
  "Poor"      = "#fc8d59",
  "Very Poor" = "#d73027"
)

f_2a <- ggplot(coef_wq_ext, aes(x = level, y = estimate, color = level)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_line(aes(group = 1), color = "black", linewidth = 0.6) +
  geom_point(size = 3) +
  geom_errorbar(
    data = subset(coef_wq_ext, !is.na(lower)),
    aes(ymin = lower, ymax = upper),
    width = 0.15,
    linewidth = 0.8
  ) +
  scale_color_manual(values = status_colors) +
  labs(
    x = "Water Quality Status",
    y = "Coefficient",
    title = "Projects"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )

f_2a <- f_2a +
  scale_x_discrete(
    labels = c(
      "Very Poor" = "Very Poor (10–59)",
      "Poor"      = "Poor (60–79)",
      "Fair"      = "Fair (80–84)",
      "Good"      = "Good (85–89)",
      "Excellent" = "Excellent (90–100)"
    )
  )

ggsave("coefficient_plot_projects.png", f_2a, width = 8, height = 5, dpi = 300)

# Participants
s2 <- summary(m2s_c)

if(!is.null(s2$coefficients) && ncol(s2$coefficients) >= 2){
  coefmat <- as.data.frame(s2$coefficients)
  names(coefmat)[1:4] <- c("estimate","se","stat","p")
  coef_tidy2 <- coefmat %>%
    tibble::rownames_to_column("term") %>%
    dplyr::select(term, estimate, se, stat, p)
} else {
  est_vec <- coef(m2s_c)
  vcov_mat <- vcov(m2s_c)
  se_vec <- sqrt(diag(vcov_mat))
  coef_tidy2 <- tibble(
    term = names(est_vec),
    estimate = as.numeric(est_vec),
    se = se_vec[names(est_vec)]
  )
}

coef_tidy2 <- coef_tidy2 %>%
  mutate(
    lower = estimate - 1.96 * se,
    upper = estimate + 1.96 * se
  )

coef_wq2 <- coef_tidy2 %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(
    level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
    level = gsub("^lag\\(|\\)$", "", level),
    level = trimws(level)
  ) %>%
  filter(level %in% c("Poor", "Fair", "Good", "Excellent")) %>%
  mutate(level = factor(level,
                        levels = c("Poor", "Fair", "Good", "Excellent")))

coef_wq_ext2 <- coef_wq2 %>%
  bind_rows(
    data.frame(
      term = "Reference",
      estimate = 0,
      se = NA,
      lower = NA,
      upper = NA,
      level = "Very Poor"
    )
  )

coef_wq_ext2$level <- factor(
  coef_wq_ext2$level,
  levels = c("Very Poor", "Poor", "Fair", "Good", "Excellent")
)

# Custom color palette
status_colors <- c(
  "Excellent" = "#2B8CBE",
  "Good"      = "#1a9850",
  "Fair"      = "#FFC107",
  "Poor"      = "#fc8d59",
  "Very Poor" = "#d73027"
)

f_2b <- ggplot(coef_wq_ext2, aes(x = level, y = estimate, color = level)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_line(aes(group = 1), color = "black", linewidth = 0.6) +
  geom_point(size = 3) +
  geom_errorbar(
    data = subset(coef_wq_ext2, !is.na(lower)),
    aes(ymin = lower, ymax = upper),
    width = 0.15,
    linewidth = 0.8
  ) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(
    labels = c(
      "Very Poor" = "Very Poor (10–59)",
      "Poor"      = "Poor (60–79)",
      "Fair"      = "Fair (80–84)",
      "Good"      = "Good (85–89)",
      "Excellent" = "Excellent (90–100)"
    )
  ) +
  labs(
    x = "Water Quality Status",
    y = "Coefficient",
    title = "Participants"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )

ggsave("coefficient_plot_participants.png", f_2b,
       width = 8, height = 5, dpi = 300)

#Total expenditures
s3 <- summary(m3s_c)

if(!is.null(s3$coefficients) && ncol(s3$coefficients) >= 2){
  coefmat3 <- as.data.frame(s3$coefficients)
  names(coefmat3)[1:4] <- c("estimate","se","stat","p")[1:min(4, ncol(coefmat3))]
  coef_tidy3 <- coefmat3 %>%
    tibble::rownames_to_column("term") %>%
    dplyr::select(term, estimate, se, dplyr::everything())
} else {
  # fallback: model coef() and conventional vcov
  est_vec3 <- coef(m3s_m)
  vcov_mat3 <- tryCatch(vcov(m3s_m), error = function(e) NULL)
  if(is.null(vcov_mat3)) stop("Could not extract vcov for m3s_m.")
  se_vec3 <- sqrt(diag(vcov_mat3))
  coef_tidy3 <- tibble(
    term = names(est_vec3),
    estimate = as.numeric(est_vec3),
    se = se_vec3[names(est_vec3)]
  )
}

coef_tidy3 <- coef_tidy3 %>%
  mutate(lower = estimate - 1.96 * se,
         upper = estimate + 1.96 * se)

coef_wq3 <- coef_tidy3 %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(
    level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
    level = gsub("^lag\\(|\\)$", "", level),
    level = trimws(level)
  ) %>%
  filter(level %in% c("Poor", "Fair", "Good", "Excellent")) %>%
  mutate(level = factor(level, levels = c("Poor", "Fair", "Good", "Excellent")))

if(nrow(coef_wq3) == 0) stop("No wqistatus terms found in m3s_m. Run print(coef_tidy3$term) to inspect names.")

coef_wq_ext3 <- coef_wq3 %>%
  bind_rows(
    tibble(
      term = "Reference",
      estimate = 0,
      se = NA_real_,
      lower = NA_real_,
      upper = NA_real_,
      level = "Very Poor"
    )
  )

coef_wq_ext3$level <- factor(coef_wq_ext3$level,
                             levels = c("Very Poor", "Poor", "Fair", "Good", "Excellent"))

status_colors <- c(
  "Excellent" = "#2B8CBE",
  "Good"      = "#1a9850",
  "Fair"      = "#FFC107",
  "Poor"      = "#fc8d59",
  "Very Poor" = "#d73027"
)

f_2c <- ggplot(coef_wq_ext3, aes(x = level, y = estimate, color = level)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_line(aes(group = 1), color = "black", linewidth = 0.6) +
  geom_point(size = 3) +
  geom_errorbar(
    data = subset(coef_wq_ext3, !is.na(lower)),
    aes(ymin = lower, ymax = upper),
    width = 0.15,
    linewidth = 0.8
  ) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(
    labels = c(
      "Very Poor" = "Very Poor (10–59)",
      "Poor"      = "Poor (60–79)",
      "Fair"      = "Fair (80–84)",
      "Good"      = "Good (85–89)",
      "Excellent" = "Excellent (90–100)"
    )
  ) +
  labs(
    x = "Water Quality Status",
    y = "Coefficient",
    title = "Total Expenditures"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )

ggsave("coefficient_plot_expenditures.png", f_2c, width = 8, height = 5, dpi = 300)
f_2c

# Figure 3

# Mandatory - Projects 
s1m <- summary(m1sm_c)

if(!is.null(s1m$coefficients) && ncol(s1m$coefficients) >= 2){
  coefmat3 <- as.data.frame(s1m$coefficients)
  names(coefmat3)[1:4] <- c("estimate","se","stat","p")[1:min(4, ncol(coefmat3))]
  coef_tidy3 <- coefmat3 %>%
    tibble::rownames_to_column("term") %>%
    dplyr::select(term, estimate, se, dplyr::everything())
} else {
  # fallback: model coef() and conventional vcov
  est_vec3 <- coef(m1sm_c)
  vcov_mat3 <- tryCatch(vcov(m1sm_c), error = function(e) NULL)
  if(is.null(vcov_mat3)) stop("Could not extract vcov for m3s_m.")
  se_vec3 <- sqrt(diag(vcov_mat3))
  coef_tidy3 <- tibble(
    term = names(est_vec3),
    estimate = as.numeric(est_vec3),
    se = se_vec3[names(est_vec3)]
  )
}

coef_tidy3 <- coef_tidy3 %>%
  mutate(lower = estimate - 1.96 * se,
         upper = estimate + 1.96 * se)

coef_wq3 <- coef_tidy3 %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(
    level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
    level = gsub("^lag\\(|\\)$", "", level),
    level = trimws(level)
  ) %>%
  filter(level %in% c("Poor", "Fair", "Good", "Excellent")) %>%
  mutate(level = factor(level, levels = c("Poor", "Fair", "Good", "Excellent")))

if(nrow(coef_wq3) == 0) stop("No wqistatus terms found in m3s_m. Run print(coef_tidy3$term) to inspect names.")

coef_wq_ext3 <- coef_wq3 %>%
  bind_rows(
    tibble(
      term = "Reference",
      estimate = 0,
      se = NA_real_,
      lower = NA_real_,
      upper = NA_real_,
      level = "Very Poor"
    )
  )

coef_wq_ext3$level <- factor(coef_wq_ext3$level,
                             levels = c("Very Poor", "Poor", "Fair", "Good", "Excellent"))

status_colors <- c(
  "Excellent" = "#2B8CBE",
  "Good"      = "#1a9850",
  "Fair"      = "#FFC107",
  "Poor"      = "#fc8d59",
  "Very Poor" = "#d73027"
)

f_3a <- ggplot(coef_wq_ext3, aes(x = level, y = estimate, color = level)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_line(aes(group = 1), color = "black", linewidth = 0.6) +
  geom_point(size = 3) +
  geom_errorbar(
    data = subset(coef_wq_ext3, !is.na(lower)),
    aes(ymin = lower, ymax = upper),
    width = 0.15,
    linewidth = 0.8
  ) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(
    labels = c(
      "Very Poor" = "Very Poor (10–59)",
      "Poor"      = "Poor (60–79)",
      "Fair"      = "Fair (80–84)",
      "Good"      = "Good (85–89)",
      "Excellent" = "Excellent (90–100)"
    )
  ) +
  labs(
    x = "Water Quality Status",
    y = "Coefficient",
    title = "Projects (Board-funded)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )

ggsave("coefficient_plot_funded_projects.png", f_3a, width = 8, height = 5, dpi = 300)

# Voluntary - Projects 
# Non-Board-funded (m1sv_c) — Projects (non-Board-funded)
s1v <- summary(m1sv_c)

if(!is.null(s1v$coefficients) && ncol(s1v$coefficients) >= 2){
  coefmat_v <- as.data.frame(s1v$coefficients)
  names(coefmat_v)[1:4] <- c("estimate","se","stat","p")[1:min(4, ncol(coefmat_v))]
  coef_tidy_v <- coefmat_v %>%
    tibble::rownames_to_column("term") %>%
    dplyr::select(term, estimate, se, dplyr::everything())
} else {
  # fallback: model coef() and conventional vcov
  est_vec_v <- coef(m1sv_c)
  vcov_mat_v <- tryCatch(vcov(m1sv_c), error = function(e) NULL)
  if(is.null(vcov_mat_v)) stop("Could not extract vcov for m1sv_c.")
  se_vec_v <- sqrt(diag(vcov_mat_v))
  coef_tidy_v <- tibble(
    term = names(est_vec_v),
    estimate = as.numeric(est_vec_v),
    se = se_vec_v[names(est_vec_v)]
  )
}

coef_tidy_v <- coef_tidy_v %>%
  mutate(lower = estimate - 1.96 * se,
         upper = estimate + 1.96 * se)

coef_wq_v <- coef_tidy_v %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(
    level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
    level = gsub("^lag\\(|\\)$", "", level),
    level = trimws(level)
  ) %>%
  filter(level %in% c("Poor", "Fair", "Good", "Excellent")) %>%
  mutate(level = factor(level, levels = c("Poor", "Fair", "Good", "Excellent")))

if(nrow(coef_wq_v) == 0) stop("No wqistatus terms found in m1sv_c. Run print(coef_tidy_v$term) to inspect names.")

coef_wq_ext_v <- coef_wq_v %>%
  bind_rows(
    tibble(
      term = "Reference",
      estimate = 0,
      se = NA_real_,
      lower = NA_real_,
      upper = NA_real_,
      level = "Very Poor"
    )
  )

coef_wq_ext_v$level <- factor(coef_wq_ext_v$level,
                              levels = c("Very Poor", "Poor", "Fair", "Good", "Excellent"))

status_colors <- c(
  "Excellent" = "#2B8CBE",
  "Good"      = "#1a9850",
  "Fair"      = "#FFC107",
  "Poor"      = "#fc8d59",
  "Very Poor" = "#d73027"
)

f_3b <- ggplot(coef_wq_ext_v, aes(x = level, y = estimate, color = level)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_line(aes(group = 1), color = "black", linewidth = 0.6) +
  geom_point(size = 3) +
  geom_errorbar(
    data = subset(coef_wq_ext_v, !is.na(lower)),
    aes(ymin = lower, ymax = upper),
    width = 0.15,
    linewidth = 0.8
  ) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(
    labels = c(
      "Very Poor" = "Very Poor (10–59)",
      "Poor"      = "Poor (60–79)",
      "Fair"      = "Fair (80–84)",
      "Good"      = "Good (85–89)",
      "Excellent" = "Excellent (90–100)"
    )
  ) +
  labs(
    x = "Water Quality Status",
    y = "Coefficient",
    title = "Projects (non-Board-funded)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )

ggsave("coefficient_plot_nonboardfunded_projects.png", f_3b, width = 8, height = 5, dpi = 300)
f_3b

# Participants (Board-funded) 
s_p_b <- summary(m2sm_c)

if(!is.null(s_p_b$coefficients) && ncol(s_p_b$coefficients) >= 2){
  coefmat_pb <- as.data.frame(s_p_b$coefficients)
  names(coefmat_pb)[1:4] <- c("estimate","se","stat","p")[1:min(4,ncol(coefmat_pb))]
  coef_tidy_pb <- coefmat_pb %>% tibble::rownames_to_column("term") %>% dplyr::select(term, estimate, se, dplyr::everything())
} else {
  est_pb <- coef(m2sm_c); vc_pb <- tryCatch(vcov(m2sm_c), error=function(e) NULL)
  if(is.null(vc_pb)) stop("Could not extract vcov for m2sm_c.")
  se_pb <- sqrt(diag(vc_pb))
  coef_tidy_pb <- tibble(term = names(est_pb), estimate = as.numeric(est_pb), se = se_pb[names(est_pb)])
}

coef_tidy_pb <- coef_tidy_pb %>%
  mutate(lower = estimate - 1.96*se, upper = estimate + 1.96*se)

coef_wq_pb <- coef_tidy_pb %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
         level = gsub("^lag\\(|\\)$","", level),
         level = trimws(level)) %>%
  filter(level %in% c("Poor","Fair","Good","Excellent")) %>%
  mutate(level = factor(level, levels = c("Very Poor","Poor","Fair","Good","Excellent")))

if(nrow(coef_wq_pb) == 0) stop("No wqistatus terms found in m2sm_c. Run print(coef_tidy_pb$term) to inspect names.")

coef_wq_ext_pb <- coef_wq_pb %>%
  bind_rows(tibble(term="Reference", estimate=0, se=NA_real_, lower=NA_real_, upper=NA_real_, level="Very Poor"))

coef_wq_ext_pb$level <- factor(coef_wq_ext_pb$level, levels = c("Very Poor","Poor","Fair","Good","Excellent"))

status_colors <- c("Excellent"="#2B8CBE","Good"="#1a9850","Fair"="#FFC107","Poor"="#fc8d59","Very Poor"="#d73027")

p_pb <- ggplot(coef_wq_ext_pb, aes(x=level, y=estimate, color=level)) +
  geom_hline(yintercept=0, linetype="dashed", color="gray50") +
  geom_line(aes(group=1), color="black", linewidth=0.6) +
  geom_point(size=3) +
  geom_errorbar(data=subset(coef_wq_ext_pb, !is.na(lower)), aes(ymin=lower, ymax=upper), width=0.15, linewidth=0.8) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(labels = c("Very Poor"="Very Poor (10–59)","Poor"="Poor (60–79)","Fair"="Fair (80–84)","Good"="Good (85–89)","Excellent"="Excellent (90–100)")) +
  labs(x="Water Quality Status", y="Coefficient", title="Participants (Board-funded)") +
  theme_minimal(base_size=13) + theme(plot.title = element_text(hjust=0.5, face="bold"), legend.position="none")

ggsave("coef_participants_boardfunded.png", p_pb, width=8, height=5, dpi=300)

# Participants (non-Board-funded)
s_p_nb <- summary(m2sv_c)

if(!is.null(s_p_nb$coefficients) && ncol(s_p_nb$coefficients) >= 2){
  coefmat_pnb <- as.data.frame(s_p_nb$coefficients)
  names(coefmat_pnb)[1:4] <- c("estimate","se","stat","p")[1:min(4,ncol(coefmat_pnb))]
  coef_tidy_pnb <- coefmat_pnb %>% tibble::rownames_to_column("term") %>% dplyr::select(term, estimate, se, dplyr::everything())
} else {
  est_pnb <- coef(m2sv_c); vc_pnb <- tryCatch(vcov(m2sv_c), error=function(e) NULL)
  if(is.null(vc_pnb)) stop("Could not extract vcov for m2sv_c.")
  se_pnb <- sqrt(diag(vc_pnb))
  coef_tidy_pnb <- tibble(term = names(est_pnb), estimate = as.numeric(est_pnb), se = se_pnb[names(est_pnb)])
}

coef_tidy_pnb <- coef_tidy_pnb %>% mutate(lower = estimate - 1.96*se, upper = estimate + 1.96*se)

coef_wq_pnb <- coef_tidy_pnb %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
         level = gsub("^lag\\(|\\)$","", level),
         level = trimws(level)) %>%
  filter(level %in% c("Poor","Fair","Good","Excellent")) %>%
  mutate(level = factor(level, levels = c("Very Poor","Poor","Fair","Good","Excellent")))

if(nrow(coef_wq_pnb) == 0) stop("No wqistatus terms found in m2sv_c. Run print(coef_tidy_pnb$term) to inspect names.")

coef_wq_ext_pnb <- coef_wq_pnb %>%
  bind_rows(tibble(term="Reference", estimate=0, se=NA_real_, lower=NA_real_, upper=NA_real_, level="Very Poor"))

coef_wq_ext_pnb$level <- factor(coef_wq_ext_pnb$level, levels = c("Very Poor","Poor","Fair","Good","Excellent"))

status_colors <- c("Excellent"="#2B8CBE","Good"="#1a9850","Fair"="#FFC107","Poor"="#fc8d59","Very Poor"="#d73027")

p_pnb <- ggplot(coef_wq_ext_pnb, aes(x=level, y=estimate, color=level)) +
  geom_hline(yintercept=0, linetype="dashed", color="gray50") +
  geom_line(aes(group=1), color="black", linewidth=0.6) +
  geom_point(size=3) +
  geom_errorbar(data=subset(coef_wq_ext_pnb, !is.na(lower)), aes(ymin=lower, ymax=upper), width=0.15, linewidth=0.8) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(labels = c("Very Poor"="Very Poor (10–59)","Poor"="Poor (60–79)","Fair"="Fair (80–84)","Good"="Good (85–89)","Excellent"="Excellent (90–100)")) +
  labs(x="Water Quality Status", y="Coefficient", title="Participants (non-Board-funded)") +
  theme_minimal(base_size=13) + theme(plot.title = element_text(hjust=0.5, face="bold"), legend.position="none")

ggsave("coef_participants_nonboardfunded.png", p_pnb, width=8, height=5, dpi=300)
p_pnb

# Total Expenditures (Board-funded)
s_e_b <- summary(m3sm_c)

if(!is.null(s_e_b$coefficients) && ncol(s_e_b$coefficients) >= 2){
  coefmat_eb <- as.data.frame(s_e_b$coefficients)
  names(coefmat_eb)[1:4] <- c("estimate","se","stat","p")[1:min(4,ncol(coefmat_eb))]
  coef_tidy_eb <- coefmat_eb %>% tibble::rownames_to_column("term") %>% dplyr::select(term, estimate, se, dplyr::everything())
} else {
  est_eb <- coef(m3sm_c); vc_eb <- tryCatch(vcov(m3sm_c), error=function(e) NULL)
  if(is.null(vc_eb)) stop("Could not extract vcov for m3sm_c.")
  se_eb <- sqrt(diag(vc_eb))
  coef_tidy_eb <- tibble(term = names(est_eb), estimate = as.numeric(est_eb), se = se_eb[names(est_eb)])
}

coef_tidy_eb <- coef_tidy_eb %>% mutate(lower = estimate - 1.96*se, upper = estimate + 1.96*se)

coef_wq_eb <- coef_tidy_eb %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
         level = gsub("^lag\\(|\\)$","", level),
         level = trimws(level)) %>%
  filter(level %in% c("Poor","Fair","Good","Excellent")) %>%
  mutate(level = factor(level, levels = c("Very Poor","Poor","Fair","Good","Excellent")))

if(nrow(coef_wq_eb) == 0) stop("No wqistatus terms found in m3sm_c. Run print(coef_tidy_eb$term) to inspect names.")

coef_wq_ext_eb <- coef_wq_eb %>% bind_rows(tibble(term="Reference", estimate=0, se=NA_real_, lower=NA_real_, upper=NA_real_, level="Very Poor"))
coef_wq_ext_eb$level <- factor(coef_wq_ext_eb$level, levels = c("Very Poor","Poor","Fair","Good","Excellent"))

status_colors <- c("Excellent"="#2B8CBE","Good"="#1a9850","Fair"="#FFC107","Poor"="#fc8d59","Very Poor"="#d73027")

p_eb <- ggplot(coef_wq_ext_eb, aes(x=level, y=estimate, color=level)) +
  geom_hline(yintercept=0, linetype="dashed", color="gray50") +
  geom_line(aes(group=1), color="black", linewidth=0.6) +
  geom_point(size=3) +
  geom_errorbar(data=subset(coef_wq_ext_eb, !is.na(lower)), aes(ymin=lower, ymax=upper), width=0.15, linewidth=0.8) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(labels = c("Very Poor"="Very Poor (10–59)","Poor"="Poor (60–79)","Fair"="Fair (80–84)","Good"="Good (85–89)","Excellent"="Excellent (90–100)")) +
  labs(x="Water Quality Status", y="Coefficient", title="Total Expenditures (Board-funded)") +
  theme_minimal(base_size=13) + theme(plot.title = element_text(hjust=0.5, face="bold"), legend.position="none")

ggsave("coef_expenditures_boardfunded.png", p_eb, width=8, height=5, dpi=300)
p_eb

# Total Expenditures (non-Board-funded)
s_e_nb <- summary(m3sv_c)

if(!is.null(s_e_nb$coefficients) && ncol(s_e_nb$coefficients) >= 2){
  coefmat_enb <- as.data.frame(s_e_nb$coefficients)
  names(coefmat_enb)[1:4] <- c("estimate","se","stat","p")[1:min(4,ncol(coefmat_enb))]
  coef_tidy_enb <- coefmat_enb %>% tibble::rownames_to_column("term") %>% dplyr::select(term, estimate, se, dplyr::everything())
} else {
  est_enb <- coef(m3sv_c); vc_enb <- tryCatch(vcov(m3sv_c), error=function(e) NULL)
  if(is.null(vc_enb)) stop("Could not extract vcov for m3sv_c.")
  se_enb <- sqrt(diag(vc_enb))
  coef_tidy_enb <- tibble(term = names(est_enb), estimate = as.numeric(est_enb), se = se_enb[names(est_enb)])
}

coef_tidy_enb <- coef_tidy_enb %>% mutate(lower = estimate - 1.96*se, upper = estimate + 1.96*se)

coef_wq_enb <- coef_tidy_enb %>%
  filter(grepl("wqistatus", term, ignore.case = TRUE)) %>%
  mutate(level = sub(".*wqistatus\\)?[\\._: ]*", "", term, ignore.case = TRUE),
         level = gsub("^lag\\(|\\)$","", level),
         level = trimws(level)) %>%
  filter(level %in% c("Poor","Fair","Good","Excellent")) %>%
  mutate(level = factor(level, levels = c("Very Poor","Poor","Fair","Good","Excellent")))

if(nrow(coef_wq_enb) == 0) stop("No wqistatus terms found in m3sv_c. Run print(coef_tidy_enb$term) to inspect names.")

coef_wq_ext_enb <- coef_wq_enb %>% bind_rows(tibble(term="Reference", estimate=0, se=NA_real_, lower=NA_real_, upper=NA_real_, level="Very Poor"))
coef_wq_ext_enb$level <- factor(coef_wq_ext_enb$level, levels = c("Very Poor","Poor","Fair","Good","Excellent"))

status_colors <- c("Excellent"="#2B8CBE","Good"="#1a9850","Fair"="#FFC107","Poor"="#fc8d59","Very Poor"="#d73027")

p_enb <- ggplot(coef_wq_ext_enb, aes(x=level, y=estimate, color=level)) +
  geom_hline(yintercept=0, linetype="dashed", color="gray50") +
  geom_line(aes(group=1), color="black", linewidth=0.6) +
  geom_point(size=3) +
  geom_errorbar(data=subset(coef_wq_ext_enb, !is.na(lower)), aes(ymin=lower, ymax=upper), width=0.15, linewidth=0.8) +
  scale_color_manual(values = status_colors) +
  scale_x_discrete(labels = c("Very Poor"="Very Poor (10–59)","Poor"="Poor (60–79)","Fair"="Fair (80–84)","Good"="Good (85–89)","Excellent"="Excellent (90–100)")) +
  labs(x="Water Quality Status", y="Coefficient", title="Total Expenditures (non-Board-funded)") +
  theme_minimal(base_size=13) + theme(plot.title = element_text(hjust=0.5, face="bold"), legend.position="none")

ggsave("coef_expenditures_nonboardfunded.png", p_enb, width=8, height=5, dpi=300)
p_enb